Clear workspace
rm(list = ls())
library(bigrquery)
library(stringr)
library(tidyverse)
library(dplyr)
library(lme4)
library(MuMIn)
library(scales)
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
ERROR: (gcloud.projects.list) There was a problem refreshing your current auth tokens: ('invalid_grant: Bad Request', '{\n "error": "invalid_grant",\n "error_description": "Bad Request"\n}')
Please run:
$ gcloud auth login
to obtain new credentials.
If you have already logged in with a different account:
$ gcloud config set account ACCOUNT
to select an already authenticated account to use.
Warning: running command '~/google-cloud-sdk/bin/gcloud projects list --quiet' had status 1
projectid <- strsplit(response[3], " ")[[1]][1]
create_dataset <- function(poolname) {
sql <- str_replace_all("SELECT
##POOL_NAME##.percentage_of_regional_pool_present,
##POOL_NAME##.difference_from_locality_trait_gravity,
##POOL_NAME##.percentage_of_niches_present,
##POOL_NAME##.percentage_of_niches_2_present,
##POOL_NAME##.percentage_of_niches_3_present,
latitude,
longitude,
percentage_landcover_5km.closed_forest_total AS closed_forest,
percentage_landcover_5km.cultivated,
percentage_landcover_5km.herbaceous_vegetation,
percentage_landcover_5km.herbaceous_wetland,
percentage_landcover_5km.open_forest_total AS open_forest,
percentage_landcover_5km.permanent_water,
percentage_landcover_5km.shrubs,
percentage_landcover_5km.urban,
percentage_landcover_5km.elevation.mean AS mean_elevation,
percentage_landcover_5km.elevation.delta AS elevation_delta,
average_population_density.within_5km AS average_population_density,
urban_area.name AS city_name,
urban_area.location.continent,
urban_area.ecosystem.realm,
urban_area.ecosystem.biome.biome_name AS biome,
urban_area.country_economy.gdp_estimate_thousand_dollars_per_person AS national_gdp_estimate_thousand_dollars_per_person,
urban_area.country_economy.income_group AS national_income_group,
locality_id,
number_of_checklists
FROM model.urban_hotspot
JOIN model2.all_species USING(locality_id, city_id)
JOIN model.urban_area USING (city_id)", '##POOL_NAME##', poolname)
print(sql)
tb <- bq_project_query(projectid, sql)
bq_table_download(tb)
}
load_dataset <- function(poolname) {
filename <- str_replace('download_data__output__hotspot_metrics_##POOL_NAME##.csv', '##POOL_NAME##', poolname)
if (!file.exists(filename)) {
data <- create_dataset(poolname)
write_csv(data, filename)
}
data <- read_csv(filename)
data$city_name = as.factor(data$city_name)
data$continent = relevel(as.factor(data$continent), ref = "Europe")
data$realm = relevel(as.factor(data$realm), ref = "Palearctic")
data$biome = as.factor(data$biome)
data$national_income_group = as.factor(data$national_income_group)
data$mean_elevation_scaled = rescale(data$mean_elevation, to = c(0, 1), from = range(data$mean_elevation, na.rm = TRUE, finite = TRUE))
data$elevation_delta_scaled = rescale(data$elevation_delta, to = c(0, 1), from = range(data$elevation_delta, na.rm = TRUE, finite = TRUE))
data$average_population_density_scaled = rescale(data$average_population_density, to = c(0, 1), from = range(data$average_population_density, na.rm = TRUE, finite = TRUE))
data$national_gdp_estimate_thousand_dollars_per_person_scaled = rescale(data$national_gdp_estimate_thousand_dollars_per_person, to = c(0, 1), from = range(data$national_gdp_estimate_thousand_dollars_per_person, na.rm = TRUE, finite = TRUE))
data$latitude_scaled = rescale(data$latitude, to = c(0, 1), from = range(data$latitude, na.rm = TRUE, finite = TRUE))
data$longitude_scaled = rescale(data$longitude, to = c(0, 1), from = range(data$longitude, na.rm = TRUE, finite = TRUE))
data$absolute_latitude_scaled = abs(data$latitude_scaled)
data
}
merlin <- load_dataset('merlin')
Rows: 8443 Columns: 26── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): city_name, continent, realm, biome, national_income_group, locality_id
dbl (20): percentage_of_regional_pool_present, difference_from_locality_trait_gravity, percentage_of_niches_pre...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
merlin
birdlife <- load_dataset('birdlife')
Rows: 8443 Columns: 26── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): city_name, continent, realm, biome, national_income_group, locality_id
dbl (20): percentage_of_regional_pool_present, difference_from_locality_trait_gravity, percentage_of_niches_pre...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
birdlife
library("PerformanceAnalytics")
Loading required package: xts
Loading required package: zoo
Attaching package: ‘zoo’
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
Attaching package: ‘xts’
The following objects are masked from ‘package:dplyr’:
first, last
Attaching package: ‘PerformanceAnalytics’
The following object is masked from ‘package:graphics’:
legend
rcorr(as.matrix(birdlife[,c('closed_forest', 'cultivated', 'herbaceous_vegetation', 'herbaceous_wetland', 'open_forest', 'permanent_water', 'shrubs', 'urban')], method="spearman"))
Error in rcorr(as.matrix(birdlife[, c("closed_forest", "cultivated", "herbaceous_vegetation", :
could not find function "rcorr"
both <- load_dataset('both')
Rows: 8443 Columns: 26── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): city_name, continent, realm, biome, national_income_group, locality_id
dbl (20): percentage_of_regional_pool_present, difference_from_locality_trait_gravity, percentage_of_niches_pre...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
both
either <- load_dataset('either')
Rows: 8443 Columns: 26── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): city_name, continent, realm, biome, national_income_group, locality_id
dbl (20): percentage_of_regional_pool_present, difference_from_locality_trait_gravity, percentage_of_niches_pre...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
either
city_data <- read_csv('download_data__input__city_data.csv')
Rows: 137 Columns: 89── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): name, biome_name, realm
dbl (82): population, total_area, city_gdp_per_population, percentage_urban_area_as_open_public_spaces, percent...
lgl (4): city_includes_estuary, region_100km_includes_estuary, region_50km_includes_estuary, region_20km_inclu...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
city_data$realm <- as.factor(city_data$realm)
city_data$city_includes_estuary <- as.factor(city_data$city_includes_estuary)
city_data$region_100km_includes_estuary <- as.factor(city_data$region_100km_includes_estuary)
city_data$region_50km_includes_estuary <- as.factor(city_data$region_50km_includes_estuary)
city_data$region_20km_includes_estuary <- as.factor(city_data$region_20km_includes_estuary)
city_data$biome_name <- as.factor(city_data$biome_name)
city_data$population_growth = 0
for(i in 1:nrow(city_data)) {
city_data[i,]$population_growth = population_growth(city_data[i,])
}
city_data